library(tidyverse)
library(readxl)
library(performance)
library(emmeans)
library(gtsummary)

Reading in the data

In this R Markdown file, the Excel file that is read in is called analytic_data.xlxs. The data frame is called EXAMPLE_DATA. Replace these with the names of the files you wish to use.

EXAMPLE_DATA <- read_excel("analytic_data.xlsx")
EXAMPLE_DATA <- EXAMPLE_DATA %>% 
  mutate_if(is.character,as.factor)

In all of the code below, you will need to replace EXAMPLE_DATA with the name of your data frame. You will need to use the appropriate variable names.

Simple linear model with categorical predictor

MY.MODEL <- lm(NUMERICAL_VARIABLE1 ~ FACTOR1, data=EXAMPLE_DATA)

A range of ways to explore the model

summary(MY.MODEL)
## 
## Call:
## lm(formula = NUMERICAL_VARIABLE1 ~ FACTOR1, data = EXAMPLE_DATA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.140 -2.065 -0.090  1.100  5.360 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.940e+00  1.061e+00   4.656 0.000263 ***
## FACTOR1Group 2 -1.120e+00  1.500e+00  -0.746 0.466203    
## FACTOR1Group 3 -1.280e+00  1.500e+00  -0.853 0.406181    
## FACTOR1Group 4  1.669e-15  1.500e+00   0.000 1.000000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.372 on 16 degrees of freedom
## Multiple R-squared:  0.07465,    Adjusted R-squared:  -0.09885 
## F-statistic: 0.4302 on 3 and 16 DF,  p-value: 0.7342
model_performance(MY.MODEL)
## # Indices of model performance
## 
## AIC    |    AICc |     BIC |    R2 | R2 (adj.) |  RMSE | Sigma
## --------------------------------------------------------------
## 96.849 | 101.135 | 101.828 | 0.075 |    -0.099 | 2.122 | 2.372
check_model(MY.MODEL)
## Not enough model terms in the conditional part of the model to check for
##   multicollinearity.

joint_tests(MY.MODEL)
##  model term df1 df2 F.ratio p.value
##  FACTOR1      3  16   0.430  0.7342
tbl_regression(MY.MODEL)
Characteristic Beta 95% CI1 p-value
FACTOR1
    Group 1 — —
    Group 2 -1.1 -4.3, 2.1 0.5
    Group 3 -1.3 -4.5, 1.9 0.4
    Group 4 0.00 -3.2, 3.2 >0.9
1 CI = Confidence Interval
emmeans(MY.MODEL, "FACTOR1")
##  FACTOR1 emmean   SE df lower.CL upper.CL
##  Group 1   4.94 1.06 16     2.69     7.19
##  Group 2   3.82 1.06 16     1.57     6.07
##  Group 3   3.66 1.06 16     1.41     5.91
##  Group 4   4.94 1.06 16     2.69     7.19
## 
## Confidence level used: 0.95
emmeans(MY.MODEL, "FACTOR1") %>%
  pairs(adjust="none", infer=TRUE)
##  contrast          estimate  SE df lower.CL upper.CL t.ratio p.value
##  Group 1 - Group 2     1.12 1.5 16    -2.06     4.30   0.746  0.4662
##  Group 1 - Group 3     1.28 1.5 16    -1.90     4.46   0.853  0.4062
##  Group 1 - Group 4     0.00 1.5 16    -3.18     3.18   0.000  1.0000
##  Group 2 - Group 3     0.16 1.5 16    -3.02     3.34   0.107  0.9164
##  Group 2 - Group 4    -1.12 1.5 16    -4.30     2.06  -0.746  0.4662
##  Group 3 - Group 4    -1.28 1.5 16    -4.46     1.90  -0.853  0.4062
## 
## Confidence level used: 0.95

Linear model with two predictors and interaction

MY.MODEL <- lm(NUMERICAL_VARIABLE2 ~ FACTOR2+FACTOR3+FACTOR2:FACTOR3, data=EXAMPLE_DATA)
joint_tests(MY.MODEL)
##  model term      df1 df2 F.ratio p.value
##  FACTOR2           1  16   0.417  0.5275
##  FACTOR3           1  16   0.060  0.8103
##  FACTOR2:FACTOR3   1  16   0.579  0.4579
emmip(MY.MODEL, FACTOR2 ~ FACTOR3, CIs=TRUE)